{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e482a125-cdea-4098-afb4-81ee1a8e3d2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Jupyter-Notebook-732-Crawler-Collect-PyPI-Href.ipynb\n",
    "# Environment: Python 3.12.0 + Jupyter Notebook 7.3.2\n",
    "# Create by GF 2025-02-16 00:58"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a7884059-2e23-4651-b902-01b4c0306051",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas\n",
    "# ..................................................\n",
    "import GF_PY312_CLASS_Crawler_by_BS4\n",
    "# ..................................................\n",
    "Crawler = GF_PY312_CLASS_Crawler_by_BS4.Crawler()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "02160f9d-121c-455a-bd16-b75d90230a66",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['../../packages/63/2b/01f5ed23a78391f6e3e73075973da0ecb467c831376a0b09c0ec5afd7977/Flask-0.11.1-py2.py3-none-any.whl#sha256=a4f97abd30d289e548434ef42317a793f58087be1989eab96f2c647470e77000',\n",
       " '../../packages/55/8a/78e165d30f0c8bb5d57c429a30ee5749825ed461ad6c959688872643ffb3/Flask-0.11.1.tar.gz#sha256=b4713f2bfb9ebc2966b8a49903ae0d3984781d5c878591cf2f7b484d28756b0e']"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Crawler.mirrors_tuna_tsinghua_edu_cn_Collect_PyPI_Href_20250324(\"Flask\", \"0.11.1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1e30b481-ae4a-4dc2-88f4-6677acb50c53",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>package</th>\n",
       "      <th>version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>APScheduler</td>\n",
       "      <td>3.11.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Flask</td>\n",
       "      <td>3.1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Jinja2</td>\n",
       "      <td>3.1.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>MarkupSafe</td>\n",
       "      <td>3.0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PyJWT</td>\n",
       "      <td>2.10.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>webencodings</td>\n",
       "      <td>0.5.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171</th>\n",
       "      <td>websocket-client</td>\n",
       "      <td>1.8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>172</th>\n",
       "      <td>widgetsnbextension</td>\n",
       "      <td>4.0.13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>173</th>\n",
       "      <td>yarl</td>\n",
       "      <td>1.18.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174</th>\n",
       "      <td>zipp</td>\n",
       "      <td>3.21.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>175 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                package version\n",
       "0           APScheduler  3.11.0\n",
       "1                 Flask   3.1.0\n",
       "2                Jinja2   3.1.6\n",
       "3            MarkupSafe   3.0.2\n",
       "4                 PyJWT  2.10.1\n",
       "..                  ...     ...\n",
       "170        webencodings   0.5.1\n",
       "171    websocket-client   1.8.0\n",
       "172  widgetsnbextension  4.0.13\n",
       "173                yarl  1.18.3\n",
       "174                zipp  3.21.0\n",
       "\n",
       "[175 rows x 2 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pandas.read_csv(\"./PyPI.csv\", dtype={\"package\":str, \"version\":str})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "78d88900-b8c8-459b-a805-806dada8d235",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>package</th>\n",
       "      <th>version</th>\n",
       "      <th>package_new</th>\n",
       "      <th>url</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>APScheduler</td>\n",
       "      <td>3.11.0</td>\n",
       "      <td>APScheduler</td>\n",
       "      <td>[../../packages/d0/ae/9a053dd9229c0fde6b1f1f33...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Flask</td>\n",
       "      <td>3.1.0</td>\n",
       "      <td>Flask</td>\n",
       "      <td>[../../packages/af/47/93213ee66ef8fae3b93b3e29...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Jinja2</td>\n",
       "      <td>3.1.6</td>\n",
       "      <td>Jinja2</td>\n",
       "      <td>[../../packages/62/a1/3d680cbfd5f4b8f15abc1d57...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>MarkupSafe</td>\n",
       "      <td>3.0.2</td>\n",
       "      <td>MarkupSafe</td>\n",
       "      <td>[../../packages/04/90/d08277ce111dd22f77149fd1...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PyJWT</td>\n",
       "      <td>2.10.1</td>\n",
       "      <td>PyJWT</td>\n",
       "      <td>[../../packages/61/ad/689f02752eeec26aed679477...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>webencodings</td>\n",
       "      <td>0.5.1</td>\n",
       "      <td>webencodings</td>\n",
       "      <td>[../../packages/f4/24/2a3e3df732393fed8b3ebf2e...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171</th>\n",
       "      <td>websocket-client</td>\n",
       "      <td>1.8.0</td>\n",
       "      <td>websocket-client</td>\n",
       "      <td>[../../packages/5a/84/44687a29792a70e111c5c477...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>172</th>\n",
       "      <td>widgetsnbextension</td>\n",
       "      <td>4.0.13</td>\n",
       "      <td>widgetsnbextension</td>\n",
       "      <td>[../../packages/21/02/88b65cc394961a60c43c7051...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>173</th>\n",
       "      <td>yarl</td>\n",
       "      <td>1.18.3</td>\n",
       "      <td>yarl</td>\n",
       "      <td>[../../packages/d2/98/e005bc608765a8a5569f58e6...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174</th>\n",
       "      <td>zipp</td>\n",
       "      <td>3.21.0</td>\n",
       "      <td>zipp</td>\n",
       "      <td>[../../packages/b7/1a/7e4798e9339adc931158c9d6...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>175 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                package version         package_new  \\\n",
       "0           APScheduler  3.11.0         APScheduler   \n",
       "1                 Flask   3.1.0               Flask   \n",
       "2                Jinja2   3.1.6              Jinja2   \n",
       "3            MarkupSafe   3.0.2          MarkupSafe   \n",
       "4                 PyJWT  2.10.1               PyJWT   \n",
       "..                  ...     ...                 ...   \n",
       "170        webencodings   0.5.1        webencodings   \n",
       "171    websocket-client   1.8.0    websocket-client   \n",
       "172  widgetsnbextension  4.0.13  widgetsnbextension   \n",
       "173                yarl  1.18.3                yarl   \n",
       "174                zipp  3.21.0                zipp   \n",
       "\n",
       "                                                   url  \n",
       "0    [../../packages/d0/ae/9a053dd9229c0fde6b1f1f33...  \n",
       "1    [../../packages/af/47/93213ee66ef8fae3b93b3e29...  \n",
       "2    [../../packages/62/a1/3d680cbfd5f4b8f15abc1d57...  \n",
       "3    [../../packages/04/90/d08277ce111dd22f77149fd1...  \n",
       "4    [../../packages/61/ad/689f02752eeec26aed679477...  \n",
       "..                                                 ...  \n",
       "170  [../../packages/f4/24/2a3e3df732393fed8b3ebf2e...  \n",
       "171  [../../packages/5a/84/44687a29792a70e111c5c477...  \n",
       "172  [../../packages/21/02/88b65cc394961a60c43c7051...  \n",
       "173  [../../packages/d2/98/e005bc608765a8a5569f58e6...  \n",
       "174  [../../packages/b7/1a/7e4798e9339adc931158c9d6...  \n",
       "\n",
       "[175 rows x 4 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"package_new\"] = df[\"package\"].apply(lambda x: x.replace('_', '-'))\n",
    "# ..................................................\n",
    "df[\"url\"] = df.apply(\n",
    "                lambda x:\n",
    "                Crawler.mirrors_tuna_tsinghua_edu_cn_Collect_PyPI_Href_20250324(x[\"package_new\"], x[\"version\"]),\n",
    "                axis=1\n",
    "            )\n",
    "# ..................................................\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a53f9fcd-50e8-4926-8a40-d0514c63c121",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>package</th>\n",
       "      <th>version</th>\n",
       "      <th>package_new</th>\n",
       "      <th>url</th>\n",
       "      <th>file</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>APScheduler</td>\n",
       "      <td>3.11.0</td>\n",
       "      <td>APScheduler</td>\n",
       "      <td>../../packages/d0/ae/9a053dd9229c0fde6b1f1f33f...</td>\n",
       "      <td>APScheduler-3.11.0-py3-none-any.whl</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>APScheduler</td>\n",
       "      <td>3.11.0</td>\n",
       "      <td>APScheduler</td>\n",
       "      <td>../../packages/4e/00/6d6814ddc19be2df62c8c898c...</td>\n",
       "      <td>apscheduler-3.11.0.tar.gz</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Flask</td>\n",
       "      <td>3.1.0</td>\n",
       "      <td>Flask</td>\n",
       "      <td>../../packages/af/47/93213ee66ef8fae3b93b3e292...</td>\n",
       "      <td>flask-3.1.0-py3-none-any.whl</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Flask</td>\n",
       "      <td>3.1.0</td>\n",
       "      <td>Flask</td>\n",
       "      <td>../../packages/89/50/dff6380f1c7f84135484e176e...</td>\n",
       "      <td>flask-3.1.0.tar.gz</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Jinja2</td>\n",
       "      <td>3.1.6</td>\n",
       "      <td>Jinja2</td>\n",
       "      <td>../../packages/62/a1/3d680cbfd5f4b8f15abc1d571...</td>\n",
       "      <td>jinja2-3.1.6-py3-none-any.whl</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2458</th>\n",
       "      <td>yarl</td>\n",
       "      <td>1.18.3</td>\n",
       "      <td>yarl</td>\n",
       "      <td>../../packages/b2/fc/a8aef69156ad5508165d8ae95...</td>\n",
       "      <td>yarl-1.18.3-cp39-cp39-win_amd64.whl</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2459</th>\n",
       "      <td>yarl</td>\n",
       "      <td>1.18.3</td>\n",
       "      <td>yarl</td>\n",
       "      <td>../../packages/f5/4b/a06e0ec3d155924f77835ed2d...</td>\n",
       "      <td>yarl-1.18.3-py3-none-any.whl</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2460</th>\n",
       "      <td>yarl</td>\n",
       "      <td>1.18.3</td>\n",
       "      <td>yarl</td>\n",
       "      <td>../../packages/b7/9d/4b94a8e6d2b51b599516a5cb8...</td>\n",
       "      <td>yarl-1.18.3.tar.gz</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2461</th>\n",
       "      <td>zipp</td>\n",
       "      <td>3.21.0</td>\n",
       "      <td>zipp</td>\n",
       "      <td>../../packages/b7/1a/7e4798e9339adc931158c9d69...</td>\n",
       "      <td>zipp-3.21.0-py3-none-any.whl</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2462</th>\n",
       "      <td>zipp</td>\n",
       "      <td>3.21.0</td>\n",
       "      <td>zipp</td>\n",
       "      <td>../../packages/3f/50/bad581df71744867e9468ebd0...</td>\n",
       "      <td>zipp-3.21.0.tar.gz</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2463 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          package version  package_new  \\\n",
       "0     APScheduler  3.11.0  APScheduler   \n",
       "1     APScheduler  3.11.0  APScheduler   \n",
       "2           Flask   3.1.0        Flask   \n",
       "3           Flask   3.1.0        Flask   \n",
       "4          Jinja2   3.1.6       Jinja2   \n",
       "...           ...     ...          ...   \n",
       "2458         yarl  1.18.3         yarl   \n",
       "2459         yarl  1.18.3         yarl   \n",
       "2460         yarl  1.18.3         yarl   \n",
       "2461         zipp  3.21.0         zipp   \n",
       "2462         zipp  3.21.0         zipp   \n",
       "\n",
       "                                                    url  \\\n",
       "0     ../../packages/d0/ae/9a053dd9229c0fde6b1f1f33f...   \n",
       "1     ../../packages/4e/00/6d6814ddc19be2df62c8c898c...   \n",
       "2     ../../packages/af/47/93213ee66ef8fae3b93b3e292...   \n",
       "3     ../../packages/89/50/dff6380f1c7f84135484e176e...   \n",
       "4     ../../packages/62/a1/3d680cbfd5f4b8f15abc1d571...   \n",
       "...                                                 ...   \n",
       "2458  ../../packages/b2/fc/a8aef69156ad5508165d8ae95...   \n",
       "2459  ../../packages/f5/4b/a06e0ec3d155924f77835ed2d...   \n",
       "2460  ../../packages/b7/9d/4b94a8e6d2b51b599516a5cb8...   \n",
       "2461  ../../packages/b7/1a/7e4798e9339adc931158c9d69...   \n",
       "2462  ../../packages/3f/50/bad581df71744867e9468ebd0...   \n",
       "\n",
       "                                     file  \n",
       "0     APScheduler-3.11.0-py3-none-any.whl  \n",
       "1               apscheduler-3.11.0.tar.gz  \n",
       "2            flask-3.1.0-py3-none-any.whl  \n",
       "3                      flask-3.1.0.tar.gz  \n",
       "4           jinja2-3.1.6-py3-none-any.whl  \n",
       "...                                   ...  \n",
       "2458  yarl-1.18.3-cp39-cp39-win_amd64.whl  \n",
       "2459         yarl-1.18.3-py3-none-any.whl  \n",
       "2460                   yarl-1.18.3.tar.gz  \n",
       "2461         zipp-3.21.0-py3-none-any.whl  \n",
       "2462                   zipp-3.21.0.tar.gz  \n",
       "\n",
       "[2463 rows x 5 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.explode('url', ignore_index=True)  # 使用 explode 函数展开列表, ignore_index=True 会在展开后重置索引\n",
    "# ..................................................\n",
    "df[\"file\"] = df[ \"url\"].apply(lambda x: x.split('/')[-1])\n",
    "df[\"file\"] = df[\"file\"].apply(lambda x: re.sub(\"#sha256.*\", '', x))\n",
    "# ..................................................\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "42c17519-5b97-4e69-b188-46e9fc4bc3bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "df[[\"file\", \"url\"]].to_csv(\"./PyPI-Downloads.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "ac3328ae-0358-48e9-a23d-d62ba6c5c613",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ##################################################\n",
    "# Download PyPI Package\n",
    "# ##################################################"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "badec26a-135c-4b18-a066-32bb5e8efd0b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file</th>\n",
       "      <th>url</th>\n",
       "      <th>url_complete</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>APScheduler-3.11.0-py3-none-any.whl</td>\n",
       "      <td>../../packages/d0/ae/9a053dd9229c0fde6b1f1f33f...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>apscheduler-3.11.0.tar.gz</td>\n",
       "      <td>../../packages/4e/00/6d6814ddc19be2df62c8c898c...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>flask-3.1.0-py3-none-any.whl</td>\n",
       "      <td>../../packages/af/47/93213ee66ef8fae3b93b3e292...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>flask-3.1.0.tar.gz</td>\n",
       "      <td>../../packages/89/50/dff6380f1c7f84135484e176e...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>jinja2-3.1.6-py3-none-any.whl</td>\n",
       "      <td>../../packages/62/a1/3d680cbfd5f4b8f15abc1d571...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2458</th>\n",
       "      <td>yarl-1.18.3-cp39-cp39-win_amd64.whl</td>\n",
       "      <td>../../packages/b2/fc/a8aef69156ad5508165d8ae95...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2459</th>\n",
       "      <td>yarl-1.18.3-py3-none-any.whl</td>\n",
       "      <td>../../packages/f5/4b/a06e0ec3d155924f77835ed2d...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2460</th>\n",
       "      <td>yarl-1.18.3.tar.gz</td>\n",
       "      <td>../../packages/b7/9d/4b94a8e6d2b51b599516a5cb8...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2461</th>\n",
       "      <td>zipp-3.21.0-py3-none-any.whl</td>\n",
       "      <td>../../packages/b7/1a/7e4798e9339adc931158c9d69...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2462</th>\n",
       "      <td>zipp-3.21.0.tar.gz</td>\n",
       "      <td>../../packages/3f/50/bad581df71744867e9468ebd0...</td>\n",
       "      <td>https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2463 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     file  \\\n",
       "0     APScheduler-3.11.0-py3-none-any.whl   \n",
       "1               apscheduler-3.11.0.tar.gz   \n",
       "2            flask-3.1.0-py3-none-any.whl   \n",
       "3                      flask-3.1.0.tar.gz   \n",
       "4           jinja2-3.1.6-py3-none-any.whl   \n",
       "...                                   ...   \n",
       "2458  yarl-1.18.3-cp39-cp39-win_amd64.whl   \n",
       "2459         yarl-1.18.3-py3-none-any.whl   \n",
       "2460                   yarl-1.18.3.tar.gz   \n",
       "2461         zipp-3.21.0-py3-none-any.whl   \n",
       "2462                   zipp-3.21.0.tar.gz   \n",
       "\n",
       "                                                    url  \\\n",
       "0     ../../packages/d0/ae/9a053dd9229c0fde6b1f1f33f...   \n",
       "1     ../../packages/4e/00/6d6814ddc19be2df62c8c898c...   \n",
       "2     ../../packages/af/47/93213ee66ef8fae3b93b3e292...   \n",
       "3     ../../packages/89/50/dff6380f1c7f84135484e176e...   \n",
       "4     ../../packages/62/a1/3d680cbfd5f4b8f15abc1d571...   \n",
       "...                                                 ...   \n",
       "2458  ../../packages/b2/fc/a8aef69156ad5508165d8ae95...   \n",
       "2459  ../../packages/f5/4b/a06e0ec3d155924f77835ed2d...   \n",
       "2460  ../../packages/b7/9d/4b94a8e6d2b51b599516a5cb8...   \n",
       "2461  ../../packages/b7/1a/7e4798e9339adc931158c9d69...   \n",
       "2462  ../../packages/3f/50/bad581df71744867e9468ebd0...   \n",
       "\n",
       "                                           url_complete  \n",
       "0     https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "1     https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "2     https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "3     https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "4     https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "...                                                 ...  \n",
       "2458  https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "2459  https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "2460  https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "2461  https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "2462  https://mirrors.tuna.tsinghua.edu.cn/pypi/web/...  \n",
       "\n",
       "[2463 rows x 3 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pandas.read_csv(\"./PyPI-Downloads.csv\")\n",
    "# ..................................................\n",
    "df[\"url_complete\"] = df[\"url\"].apply(lambda x: x.replace(\"../../\", \"https://mirrors.tuna.tsinghua.edu.cn/pypi/web/\"))\n",
    "# ..................................................\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "aa469cbe-8b8f-4b88-9dc9-98721be1de73",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       True\n",
       "1       True\n",
       "2       True\n",
       "3       True\n",
       "4       True\n",
       "        ... \n",
       "2458    True\n",
       "2459    True\n",
       "2460    True\n",
       "2461    True\n",
       "2462    True\n",
       "Length: 2463, dtype: bool"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Crawler.Hedaers = Crawler.Some_Hedaers[1]\n",
    "# ..................................................\n",
    "df.apply(\n",
    "    lambda x:\n",
    "    Crawler.Requests_2_x_Download_File(x[\"url_complete\"], \"./downloads/\" + x[\"file\"]),\n",
    "    axis=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "934e553f-5d55-43b2-ad05-dcfd3ccb0fb6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
